SpatialPCA is a spatially aware dimension reduction method that aims to infer a low dimensional representation of the gene expression data in spatial transcriptomics. SpatialPCA builds upon the probabilistic version of PCA, incorporates localization information as additional input, and uses a kernel matrix to explicitly model the spatial correlation structure across tissue locations.
Load in processed data: gene expression and location
library(SpatialPCA)
library(ggplot2)
library(bluster)
library(ggpubr)
library(slingshot)
load("ST_tumor_example.RData")
ls()
## [1] "expr" "info"
Scale expression of each gene, and prepare data matrices needed in SpatialPCA. Here we set number of low dimensional components to be 20.
expr=scale_expr(expr)
dat = data_prepare_func(expr, info)
Then we select bandwidth for Gaussian kernel.
bandwidth = bandwidth_select(expr, info,"SJ")
K=kernel_build(kernelpara="gaussian", dat$ED2,bandwidth)
Run SpatialPCA functions to extract spatial PCs:
Est_para = SpatialPCA_estimate_parameter(dat_input=dat,PCnum=20)
Est_W = SpatialPCA_estimate_W(Est_para$par, dat,PCnum=20)
Est_Z = SpatialPCA_estimate_Z(Est_para$par,dat,Est_W,PCnum=20)
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
dat$Est_para = Est_para
dat$Est_W = Est_W
dat$Z_spatial = Est_Z$Z_hat
walktrap_cluster_SpatialPCA = walktrap_clustering(knearest = seq(from=30,to=32,by=1), latent_dat=dat$Z_spatial)
## [1] 1
## [1] 2
## [1] 3
cbp_spatialpca <- c( "plum1", "dodgerblue","mediumaquamarine", "palegreen4","chocolate1","lightblue2","#F0E442", "red","#CC79A7","mediumpurple","seagreen1")
p=list()
clusterlabel = walktrap_cluster_SpatialPCA$cluster_label
for(count in 1:length(clusterlabel)){
colnames(info) = c("sdimx","sdimy")
loc1 = info$sdimx
loc2 = info$sdimy
cluster = clusterlabel[[count]]
datt = data.frame(cluster, loc1, loc2)
p[[count]] = ggplot(datt, aes(x = loc1, y = loc2, color = cluster)) +
geom_point( alpha = 1,size=3) +
scale_color_manual(values = cbp_spatialpca)+
theme_void()+
theme(plot.title = element_text(size = 10, face = "bold"),
text = element_text(size = 10),
legend.position = "bottom")
}
print(ggarrange(p[[1]], p[[2]],p[[3]],
ncol = 3, nrow = 1))
Compare with PCA clustering
dat$Z_pca=get_PCA(expr, PCnum=20)
walktrap_cluster_PCA = walktrap_clustering(knearest = seq(from=32,to=34,by=1), latent_dat=dat$Z_pca)
## [1] 1
## [1] 2
## [1] 3
cbp_pca <- c( "plum1", "chocolate1","dodgerblue", "palegreen4", "lightblue2",
"#F0E442","mediumaquamarine", "red","#CC79A7","mediumpurple","seagreen1")
p=list()
clusterlabel = walktrap_cluster_PCA$cluster_label
for(count in 1:length(clusterlabel)){
colnames(info) = c("sdimx","sdimy")
loc1 = info$sdimx
loc2 = info$sdimy
cluster = clusterlabel[[count]]
datt = data.frame(cluster, loc1, loc2)
p[[count]] = ggplot(datt, aes(x = loc1, y = loc2, color = cluster)) +
geom_point( alpha = 1,size=3) +
scale_color_manual(values = cbp_pca)+
theme_void()+
theme(plot.title = element_text(size = 10, face = "bold"),
text = element_text(size = 10),
legend.position = "bottom")
}
print(ggarrange(p[[1]], p[[2]],p[[3]],
ncol = 3, nrow = 1))
Spatial trajectory inference.
meta_data = info
meta_data$SpatialPCA_Walktrap = walktrap_cluster_SpatialPCA$cluster_label[[2]]
sim<- SingleCellExperiment(assays = expr)
reducedDims(sim) <- SimpleList(DRM = t(dat$Z_spatial))
colData(sim)$Walktrap <- factor(meta_data$SpatialPCA_Walktrap)
sim <-slingshot(sim, clusterLabels = 'Walktrap', reducedDim = 'DRM')
summary(sim@colData@listData)
## Length Class Mode
## Walktrap 607 factor numeric
## slingshot 607 PseudotimeOrdering S4
## slingPseudotime_1 607 -none- numeric
## slingPseudotime_2 607 -none- numeric
## slingPseudotime_3 607 -none- numeric
pseudotime_traj1 = sim@colData@listData$slingPseudotime_1
pseudotime_traj2 = sim@colData@listData$slingPseudotime_2
pseudotime_traj3 = sim@colData@listData$slingPseudotime_3
clusterlabels = meta_data$SpatialPCA_Walktrap
gridnum = 10
color_in = c( "plum1", "dodgerblue","mediumaquamarine", "palegreen4", "chocolate1",
"lightblue2","#F0E442", "black","#CC79A7","mediumpurple","seagreen1")
p_traj1 = plot_trajectory(pseudotime_traj1, info,clusterlabels,gridnum,color_in,pointsize=4 ,arrowlength=0.2,arrowsize=1,textsize=15 )
p_traj2 = plot_trajectory(pseudotime_traj2, info,clusterlabels,gridnum,color_in,pointsize=4 ,arrowlength=0.2,arrowsize=1,textsize=15 )
p_traj3 = plot_trajectory(pseudotime_traj3, info,clusterlabels,gridnum,color_in,pointsize=4 ,arrowlength=0.2,arrowsize=1,textsize=15 )
print(ggarrange( p_traj1[[4]],p_traj2[[4]],p_traj3[[4]],p_traj1[[1]],p_traj2[[1]],p_traj3[[1]],
ncol = 3, nrow = 2))
Z_high = high_resolution(info, K, kernelpara="gaussian",ED=dat$ED, est_tau = dat$Est_para$par,est_W = Est_W[[1]] ,est_sigma0 = Est_W[[2]][1,1],est_Z = Est_Z,PCnum=20)
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
#dat$Z_star = Z_high$Z_star
walktrap_SpatialPCA_highresolution = walktrap_clustering(knearest = 84, Z_high$Z_star)
## [1] 1
cbp2 <- c( "plum1", "palegreen4","mediumaquamarine", "dodgerblue", "chocolate1",
"#F0E442","lightblue2", "red","#CC79A7","mediumpurple","seagreen1")
loc1=unlist(Z_high$Location_star[,1])
loc2=unlist(Z_high$Location_star[,2])
clusters = walktrap_SpatialPCA_highresolution$cluster_label[[1]]
p3 = plot_cluster(loc1,loc2,clusters,pointsize=2,text_size=10 ,"",cbp2)
print(p3)